library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
source("/home/guestA/n70275b/work/rscripts/geomNorm.R")
# Helper function
#ggpoints <- function(x,...)
# ggplot(x,...) + geom_point(size=3,stroke=1) +
# ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor
## ラベルあり
ggpoints <- function(x,...)
ggplot(x,...) + geom_point(stroke=1) +
ggrepel::geom_text_repel(size=4) + theme_minimal() + mycolor
## ラベルなし
#ggpoints <- function(x,...)
# ggplot(x,...) + geom_point(stroke=1) + theme_minimal() + mycolor
print(Sys.Date())
[1] "2020-09-15"
print(sessionInfo(),locale=FALSE)
R version 4.0.1 (2020-06-06)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux
Matrix products: default
BLAS/LAPACK: /usr/local/intel2018_up1/compilers_and_libraries_2018.0.128/linux/mkl/lib/intel64_lin/libmkl_intel_lp64.so
attached base packages:
[1] grid stats4 parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] corrplot_0.84 Hmisc_4.4-1 Formula_1.2-3 survival_3.2-3
[5] lattice_0.20-41 stringr_1.4.0 hrbrthemes_0.8.0 ggrepel_0.8.2
[9] ggpubr_0.4.0.999 gplots_3.0.4 DESeq2_1.28.1 GGally_2.0.0
[13] vcd_1.4-7 BiocParallel_1.22.0 Matrix_1.2-18 SummarizedExperiment_1.18.2
[17] DelayedArray_0.14.1 matrixStats_0.56.0 motifmatchr_1.10.0 org.Mm.eg.db_3.11.4
[21] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0 org.Hs.eg.db_3.11.4 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 GenomicFeatures_1.40.1
[25] AnnotationDbi_1.50.3 Biobase_2.48.0 ChIPseeker_1.24.0 clusterProfiler_3.16.1
[29] BSgenome.Mmusculus.UCSC.mm10_1.4.0 ggsignif_0.6.0 chromVAR_1.10.0 purrr_0.3.4
[33] RColorBrewer_1.1-2 ggsci_2.9 readr_1.3.1 tidyr_1.1.2
[37] dplyr_1.0.2 ggplot2_3.3.2 TFBSTools_1.26.0 BSgenome_1.56.0
[41] rtracklayer_1.48.0 Biostrings_2.56.0 XVector_0.28.0 GenomicRanges_1.40.0
[45] GenomeInfoDb_1.24.2 IRanges_2.22.2 S4Vectors_0.26.1 BiocGenerics_0.34.0
loaded via a namespace (and not attached):
[1] rappdirs_0.3.1 R.methodsS3_1.8.1 bit64_4.0.5 knitr_1.29 irlba_2.3.3 R.utils_2.10.1
[7] rpart_4.1-15 data.table_1.13.0 KEGGREST_1.28.0 RCurl_1.98-1.2 generics_0.0.2 cowplot_1.1.0
[13] lambda.r_1.2.4 RSQLite_2.2.0 europepmc_0.4 bit_4.0.4 enrichplot_1.8.1 xml2_1.3.2
[19] httpuv_1.5.4 assertthat_0.2.1 DirichletMultinomial_1.30.0 viridis_0.5.1 xfun_0.17 hms_0.5.3
[25] evaluate_0.14 promises_1.1.1 fansi_0.4.1 progress_1.2.2 caTools_1.18.0 dbplyr_1.4.4
[31] readxl_1.3.1 igraph_1.2.5 DBI_1.1.0 geneplotter_1.66.0 htmlwidgets_1.5.1 futile.logger_1.4.3
[37] reshape_0.8.8 ellipsis_0.3.1 backports_1.1.9 annotate_1.66.0 biomaRt_2.44.1 vctrs_0.3.4
[43] abind_1.4-5 withr_2.2.0 ggforce_0.3.2 triebeard_0.3.0 checkmate_2.0.0 GenomicAlignments_1.24.0
[49] prettyunits_1.1.1 cluster_2.1.0 DOSE_3.14.0 lazyeval_0.2.2 seqLogo_1.54.3 crayon_1.3.4
[55] genefilter_1.70.0 labeling_0.3 pkgconfig_2.0.3 tweenr_1.0.1 nlme_3.1-149 nnet_7.3-14
[61] rlang_0.4.7 lifecycle_0.2.0 miniUI_0.1.1.1 downloader_0.4 extrafontdb_1.0 BiocFileCache_1.12.1
[67] cellranger_1.1.0 polyclip_1.10-0 lmtest_0.9-38 urltools_1.7.3 carData_3.0-4 boot_1.3-25
[73] zoo_1.8-8 base64enc_0.1-3 pheatmap_1.0.12 ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0
[79] bitops_1.0-6 R.oo_1.24.0 KernSmooth_2.23-17 blob_1.2.1 qvalue_2.20.0 jpeg_0.1-8.1
[85] rstatix_0.6.0 gridGraphics_0.5-0 CNEr_1.24.0 scales_1.1.1 memoise_1.1.0 magrittr_1.5
[91] plyr_1.8.6 gdata_2.18.0 zlibbioc_1.34.0 compiler_4.0.1 scatterpie_0.1.5 plotrix_3.7-8
[97] Rsamtools_2.4.0 cli_2.0.2 htmlTable_2.0.1 formatR_1.7 mgcv_1.8-33 MASS_7.3-53
[103] tidyselect_1.1.0 stringi_1.5.3 forcats_0.5.0 yaml_2.2.1 GOSemSim_2.14.2 askpass_1.1
[109] locfit_1.5-9.4 latticeExtra_0.6-29 fastmatch_1.1-0 tools_4.0.1 rio_0.5.16 rstudioapi_0.11
[115] TFMPvalue_0.0.8 foreign_0.8-80 gridExtra_2.3 farver_2.0.3 ggraph_2.0.3 digest_0.6.25
[121] rvcheck_0.1.8 BiocManager_1.30.10 FNN_1.1.3 shiny_1.5.0 pracma_2.2.9 Rcpp_1.0.5
[127] car_3.0-9 broom_0.7.0 later_1.1.0.1 gdtools_0.2.2 httr_1.4.2 colorspace_1.4-1
[133] XML_3.99-0.5 splines_4.0.1 uwot_0.1.8 graphlayouts_0.7.0 ggplotify_0.0.5 systemfonts_0.3.1
[139] plotly_4.9.2.1 xtable_1.8-4 jsonlite_1.7.1 futile.options_1.0.1 poweRlaw_0.70.6 tidygraph_1.2.0
[145] R6_2.4.1 pillar_1.4.6 htmltools_0.5.0 mime_0.9 cpp11_0.2.1 glue_1.4.2
[151] fastmap_1.0.1 DT_0.15 fgsea_1.14.0 utf8_1.1.4 tibble_3.0.3 curl_4.3
[157] gtools_3.8.2 zip_2.1.1 GO.db_3.11.4 openxlsx_4.1.5 openssl_1.4.2 Rttf2pt1_1.3.8
[163] rmarkdown_2.3 munsell_0.5.0 DO.db_2.9 GenomeInfoDbData_1.2.3 msigdbr_7.1.1 haven_2.3.1
[169] reshape2_1.4.4 gtable_0.3.0 extrafont_0.17
select <- dplyr::select
rename <- dplyr::rename #191203
count <- dplyr::count #191203
modify here
# Files
deftable <- "/home/guestA/o70578a/akuwakado/kuwakado/scCELSeq2/Iwasaki_0386_C2C12_H3mm18/Final_Last_Rserver_200523/iwasaki_0386_noumi_def_fin191203__200523ver.txt" #最終版 121203
#deftable <- "/home/guestA/o70578a/akuwakado/kuwakado/scCELSeq2/Iwasaki_0386_C2C12_H3mm18/iwasaki_0386_umi_def_fin191203ver.txt" #最終版 121203
#deftable <- "/home/guestA/o70578a/akuwakado/kuwakado/scCELSeq2/Iwasaki_0386_C2C12_H3mm18/iwasaki_0386_umi_def.txt"
#deftable <- "deftable_BRB_umi_new.txt"
## Data selection (filter rows of deftable)
#use <- quo(!grepl("^18",group) & (group != "Nc-minusTryd"))
#use <- quo(TRUE) # use all
use <- quo(group %in% c("eGFP_G","mm18B_G","eGFP_D72", "mm18B_D72"))
#use <- quo(type == "C2C12")
#use <- quo(type != "C2C12")
#use <- quo(TRUE)
#use <- quo(type == "Whole_cell")
#use <- quo(type == "Nucleus")
# Species specific parameters
species <- "Mus musculus"
biomartann <- "mmusculus_gene_ensembl"
maxchrom <- 19 # 19: mouse, 22: human
# Graphics
# aesthetic mapping of labels
#myaes <- aes(colour=enzyme,shape=leg,label=rep)
#myaes <- aes(colour=growth,shape=type,size=count) #ラベルなし
#myaes <- aes(colour=growth,shape=type,label=replicate,size=count) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=replicate) #ラベルあり
#myaes <- aes(colour=enzyme,shape=leg,label=factor(rep))
#myaes <- aes(colour=type,shape=trypsin,label=factor(lot))
#myaes <- aes(colour=trypsin,label=factor(lot))
myaes <- aes(colour=time,shape=type,label=factor(lot))
# color palette of points: See vignette("ggsci")
#mycolor <- ggsci::scale_color_aaas()
mycolor <- ggsci::scale_color_d3("category20") # color palette of points
# PCA/UMAP
scalerows <- TRUE # gene-wise scaling (pattern is the matter?)
ntop <- 500 # number of top-n genes with high variance
seed <- 123 # set another number if UMAP looks not good
n_nei <- 2 #6 # number of neighboring data points in UMAP
#hashigushi
scalerows <- FALSE # gene-wise scaling (pattern is the matter?)
#seed <- 123 # set another number if tSNE looks not good
#perprexity <- 3 # expected cluster size in tSNE
# DESeq2
#model <- ~type+trypsin
#model <- ~trypsin
model <- ~group
fdr <- 0.1 # acceptable false discovery rate
lfcthreth <- log2(1) # threshold in abs(log2FC)
# controls should be placed in the right
contrast <- list(
#time_D72_vs_G = c("time", "D72", "G")
#group_G_H3f3b_vs_WT = c("group", "H3f3b_G", "WT_G"),
#group_G_mm18B_vs_WT = c("group", "mm18B_G", "WT_G"),
#group_G_eGFP_vs_WT = c("group", "eGFP_G", "WT_G"),
#group_G_H3f3b_vs_eGFP = c("group", "H3f3b_G", "eGFP_G"),
group_G_mm18B_vs_eGFP = c("group", "mm18B_G", "eGFP_G"),
#group_G_mm18B_vs_H3f3b = c("group", "mm18B_G", "H3f3b_G"),
#group_D72_H3f3b_vs_WT = c("group", "H3f3b_D72", "WT_D72"),
#group_D72_mm18B_vs_WT = c("group", "mm18B_D72", "WT_D72"),
#group_D72_eGFP_vs_WT = c("group", "eGFP_D72", "WT_D72"),
#group_D72_H3f3b_vs_eGFP = c("group", "H3f3b_D72", "eGFP_D72"),
group_D72_mm18B_vs_eGFP = c("group", "mm18B_D72", "eGFP_D72"),
#group_D72_mm18B_vs_H3f3b = c("group", "mm18B_D72", "H3f3b_D72")
#group_WT_D72_vs_G = c("group", "WT_D72", "WT_G"),
group_eGFP_D72_vs_G = c("group", "eGFP_D72", "eGFP_G"),
#group_H3f3b_D72_vs_G = c("group", "H3f3b_D72", "H3f3b_G"),
group_mm18B_D72_vs_G = c("group", "mm18B_D72", "mm18B_G")
#type = c("type", "Nucleus", "Whole_cell"),
#trypsin = c("trypsin", "plus", "untreated")
#Intercept = list("Intercept"), # reference level
#leg_LvsR = c("leg", "L", "R"),
#enz_KvsC = c("enzyme","K","C")
#legL.enzK = list("legL.enzymeK") # interaction
#type_Doxplus_vs_minus = c("type", "Doxplus", "Doxminus")
)
if(!exists("e2g")){
ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="asia.ensembl.org")
#ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="uswest.ensembl.org")
#ensembl <- biomaRt::useMart("ENSEMBL_MART_ENSEMBL",host="useast.ensembl.org")
mart <- biomaRt::useDataset(biomartann,mart=ensembl)
e2g <- biomaRt::getBM(attributes=c("ensembl_gene_id","external_gene_name",
"gene_biotype","chromosome_name"), mart=mart) %>% as_tibble %>%
rename(
ens_gene = ensembl_gene_id,
ext_gene = external_gene_name,
biotype = gene_biotype,
chr = chromosome_name
)
}
annotate <- partial(right_join,e2g,by="ens_gene")
#-----#
nrow(e2g)
[1] 56305
readr::write_csv(e2g,"ensemble_list_asia__fin200915.csv")
#readr::write_csv(e2g,"ensemble_list_uswest_fin200523.csv.csv")
##readr::write_csv(e2g,"ensemble_list_useast.csv")
#e2g <- readr::read_csv("/home/guestA/o70578a/akuwakado/kuwakado/scCELSeq2/Iwasaki_0386_C2C12_H3mm18/Final_Last_Rserver_200523/ensemble_list_uswest_fin200523.csv.csv")
#annotate <- partial(right_join,e2g,by="ens_gene")
#nrow(e2g)
def <- readr::read_tsv(deftable) %>% filter(!!use) %>% arrange(group,sample) #20200915 change
Parsed with column specification:
cols(
RunSampleID = col_character(),
SampleNo = col_character(),
file = col_character(),
sample = col_character(),
primer = col_double(),
i7indexID = col_character(),
index = col_character(),
flocell = col_character(),
lane = col_character(),
il_barcode = col_character(),
day = col_double(),
type = col_character(),
time = col_character(),
lot = col_character(),
barcode = col_character(),
group = col_character()
)
print(def)
readr::write_csv(def,"deftable_used_CEL0386noumi_C2C12_fin200915.csv")
####--- New ---#### (no UMI ?)
# Set reference levels according to the contrast
for(x in keep(contrast,is.character))
def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])
umi <- def$file %>% unique %>% tibble(file=.) %>%
dplyr::mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
unnest() %>% dplyr::rename(barcode=cell) %>%
dplyr::inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)
Parsed with column specification:
cols(
gene = col_character(),
cell = col_character(),
count = col_double()
)
Parsed with column specification:
cols(
gene = col_character(),
cell = col_character(),
count = col_double()
)
Parsed with column specification:
cols(
gene = col_character(),
cell = col_character(),
count = col_double()
)
`cols` is now required when using unnest().
Please use `cols = c(data)`
print(umi)
## sample, barcode, file を忘れずに!
mat <- umi %>% annotate %>%
dplyr::mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
filter(!is.na(chr)) %>% spread(sample,count,fill=0)
## to check read vias, this add read number as "n" column (2019/4/19)
def <- umi %>% count(sample,wt=count) %>% dplyr::inner_join(def,.) %>% dplyr::rename(count=n)
Joining, by = "sample"
####-----------####
#def <- readr::read_tsv(deftable) %>% filter(!!use)
####--- New ---#### (no UMI ?)
# Set reference levels according to the contrast
#for(x in keep(contrast,is.character))
# def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])
#umi <- def$file %>% unique %>% tibble(file=.) %>%
# dplyr::mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
# unnest() %>% dplyr::rename(barcode=cell) %>%
# inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
# select(-file,-barcode) %>% dplyr::rename(ens_gene=gene)
## sample, barcode, file を忘れずに!
#mat <- umi %>% annotate %>%
# dplyr::mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
# filter(!is.na(chr)) %>% spread(sample,count,fill=0)
## to check read vias, this add read number as "n" column (2019/4/19)
#def <- umi %>% count(sample,wt=count) %>% inner_join(def,.) %>% dplyr::rename(count=n)
####-----------####
# Old
# Set reference levels according to the contrast
#for(x in keep(contrast,is.character))
# def[[x[1]]] <- relevel(factor(def[[x[1]]]),x[3])
#umi <- def$file %>% unique %>% tibble(file=.) %>%
# mutate(data=map(file,readr::read_tsv,progress=FALSE)) %>%
# unnest() %>% rename(barcode=cell) %>%
# inner_join(select(def,file,barcode,sample),.,c("file","barcode")) %>%
# select(-file,-barcode) %>% rename(ens_gene=gene)
#mat <- umi %>% annotate %>%
# mutate(chr=factor(chr,c(1:maxchrom,"X","Y","MT"))) %>%
# filter(!is.na(chr)) %>% spread(sample,count,fill=0)
print(umi)
print(def)
bychr <- mat %>% select(-(1:3)) %>%
gather("sample","count",-chr) %>%
group_by(chr,sample) %>% summarise(total=sum(count)) %>% ungroup
`summarise()` regrouping output by 'chr' (override with `.groups` argument)
ggplot(bychr,aes(reorder(sample,dplyr::desc(sample)),total/1e6,fill=chr)) +
theme_linedraw() + geom_bar(stat="identity") + coord_flip() +
xlab("sample") + ylab("million reads") + ggsci::scale_fill_igv() +
scale_x_discrete(limits = rev(levels(sample)))
# 前
#bychr <- mat %>% select(-(1:3)) %>%
# gather("sample","count",-chr) %>%
# group_by(chr,sample) %>% summarise(total=sum(count)) %>% ungroup
#ggplot(bychr,aes(reorder(sample,desc(sample)),total/1e6,fill=chr)) +
# theme_linedraw() + geom_bar(stat="identity") + coord_flip() +
# xlab("sample") + ylab("million reads") + ggsci::scale_fill_igv() +
# scale_x_discrete(limits = rev(levels(sample)))
#bychr <- mat %>% select(-(1:3)) %>%
# gather("sample","count",-chr) %>%
# group_by(chr,sample) %>% summarise(total=sum(count)) %>% ungroup
#ggplot(bychr,aes(reorder(sample,dplyr::desc(sample)),total/1e6,fill=chr)) +
# theme_linedraw() + geom_bar(stat="identity") + coord_flip() +
# xlab("sample") + ylab("million reads") + ggsci::scale_fill_igv() +
# scale_x_discrete(limits = rev(levels(sample)))
bt <- mat %>% select(-c(1,2,4)) %>% group_by(biotype) %>%
summarise_all(sum) %>% filter_at(-1,any_vars(. > 1000))
bt %>% tibble::column_to_rownames("biotype") %>%
as.matrix %>% t %>% mosaicplot(las=2,shade=TRUE)
drop rows with all 0 -> +1/2 -> geom.scale -> log -> Pearson’s
matf <- mat %>% filter(chr!="MT") %>% filter_at(-(1:4),any_vars(. > 0))
X <- matf %>% select(-(1:4)) %>% as.matrix
rownames(X) <- matf$ens_gene
lX <- log(gscale(X+0.5))
R <- cor(lX); diag(R) <- NA
pheatmap::pheatmap(R,color=viridis::viridis(256))
# set scale=TRUE if the patterns (not level) is the matter
p <- prcomp(t(lX[rank(-apply(lX,1,var)) <= ntop,]),scale=scalerows,center=TRUE)
screeplot(p,las=2,main="Importance")
print(summary(p)$imp[,seq(min(10,ncol(X)))])
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
Standard deviation 28.36722 9.792015 5.42517 5.21355 5.041439 4.451113 3.907467 3.733946 3.455748 3.357516
Proportion of Variance 0.72181 0.086010 0.02640 0.02438 0.022800 0.017770 0.013700 0.012510 0.010710 0.010110
Cumulative Proportion 0.72181 0.807820 0.83422 0.85860 0.881400 0.899170 0.912870 0.925370 0.936080 0.946200
label <- def %>% filter(sample %in% colnames(X))
df <- data.frame(p$x) %>% as_tibble(rownames="sample") %>%
inner_join(label,.) %>% select(-file)
Joining, by = "sample"
print(df)
ggpoints(df,modifyList(aes(PC1,PC2),myaes))
ggpoints(df,modifyList(aes(PC2,PC3),myaes))
set.seed(seed)
um <- uwot::umap(p$x,n_nei,2)
df <- as_tibble(um) %>% rename(UMAP1=V1,UMAP2=V2) %>% bind_cols(df)
ggpoints(df,modifyList(aes(UMAP1,UMAP2),myaes))
print(df)
readr::write_csv(df,"CEL0386noumi_C2C12_Count_PCA_fin200915.csv")
dds <- DESeq2::DESeqDataSetFromMatrix(X[,label$sample],label,model)
converting counts to integer mode
dds <- DESeq2::DESeq(dds)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
#=====#
# 20191213追加
dds <- DESeq2::estimateSizeFactors(dds)
norm <- DESeq2::counts(dds,normalized=TRUE) #DEGを取った後のクラスタリングに使う。
normalizedcount <- as.data.frame(norm) %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
readr::write_csv(normalizedcount, "./CEL0386noumi_C2C12_H3mm18_normCount_fin200915.csv")
normalizedcount %>% inner_join(e2g, by = "ens_gene") %>% readr::write_csv("./CEL0386noumi_C2C12_normCount_genename_fin200915.csv")
####--- + size factors を書き出し ------------------####
as.data.frame(DESeq2::sizeFactors(dds)) %>% tibble::rownames_to_column("sample") %>% readr::write_csv("./CEL0386noumi_C2C12_H3mm18_sizefactors_fin200915_fin200915.csv")
as.data.frame(DESeq2::sizeFactors(dds)) %>% tibble::rownames_to_column("sample")
#count_dds <- estimateSizeFactors(dds)
#counts(count_dds, normalized=TRUE)
vst => z score (200914add)
## 20200914
vsd <- DESeq2::vst(dds) #normalized countが入っている。(vstかrlog)
Xd <- SummarizedExperiment::assay(vsd) # 全て選択(200326) 20190920を元に (191024)
Xs <- Xd %>% t %>% scale %>% t
vst_score <- Xd %>% as.data.frame() %>% tibble::rownames_to_column("ens_gene") %>% as_tibble #200909 add
vst_type <- vst_score %>% annotate %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample))
zscore <- Xs %>% as.data.frame() %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
zscore_type <- zscore %>% annotate %>% dplyr::select("ens_gene","ext_gene", "biotype","chr", all_of(label$sample))
readr::write_csv(vst_score, "CEL0386noumi_C2C12_H3mm18__vst_all_fin200915.csv") #200909 add
readr::write_csv(vst_type, "CEL0386noumi_C2C12_H3mm18__vst_type_all_fin200915.csv") #200909 add
readr::write_csv(zscore, "CEL0386noumi_C2C12_H3mm18__zscore_all_fin200915.csv")
readr::write_csv(zscore_type, "CEL0386noumi_C2C12_H3mm18__zscore_type_all_fin200915.csv")
colnames(zscore_type)
[1] "ens_gene" "ext_gene" "biotype" "chr" "180122-eGFP-D72-lot1" "180122-eGFP-D72-lot2" "180130-eGFP-D72-lot1"
[8] "180130-eGFP-D72-lot2" "180130-eGFP-D72-lot3" "180121-eGFP-G-lot1" "180121-eGFP-G-lot2" "180403-eGFP-G-lot1" "180403-eGFP-G-lot2" "180403-eGFP-G-lot3"
[15] "180122-mm18B-D72-lot1" "180122-mm18B-D72-lot2" "180130-mm18B-D72-lot1" "180130-mm18B-D72-lot2" "180130-mm18B-D72-lot3" "180121-mm18B-G-lot1" "180121-mm18B-G-lot2"
[22] "180403-mm18B-G-lot1" "180403-mm18B-G-lot2" "180403-mm18B-G-lot3"
ncol(vst_type)
[1] 24
ncol(zscore_type)
[1] 24
nrow(vst_type)
[1] 23695
nrow(zscore_type)
[1] 23695
coef_dds <- coef(dds) %>% as.data.frame() %>% tibble::rownames_to_column("ens_gene") %>% as_tibble
readr::write_csv(coef_dds, "CEL0386noumi_C2C12_H3mm18__coefs_fin200915.csv")
DESeq2::sizeFactors(dds) %>%
{tibble(sample=names(.),sizeFactor=.)} %>%
ggplot(aes(sample,sizeFactor)) + theme_minimal() +
geom_bar(stat="identity") + coord_flip()
DESeq2::plotDispEsts(dds)
#res <- mapply(function(x)
# DESeq2::results(dds,x,lfcThreshold=lfcthreth,alpha=fdr)
#,contrast)
res <- mapply(function(x)
DESeq2::results(dds,x,lfcThreshold=lfcthreth,alpha=fdr)
,contrast)
# 200914修正
re_all <- map(res,as_tibble,rownames="ens_gene") %>%
tibble(aspect=factor(names(.),names(.)),data=.) %>%
mutate(data=map(data,annotate)) %>%
unnest(cols = "data")
re <- re_all %>% filter(padj<fdr) #191120修正 unnest()
#re <- map(res,as_tibble,rownames="ens_gene") %>%
# tibble(aspect=factor(names(.),names(.)),data=.) %>%
# mutate(data=map(data,annotate)) %>%
# unnest(cols = "data") %>% filter(padj<fdr) #191120修正 unnest()
fc <- re %>% select(1:7) %>% spread(aspect,log2FoldChange,fill=0)
imap(res,~{
cat(paste0("-- ",.y," --"))
DESeq2::summary(.x) #191120修正 DESeq2::summary.DESeqResults(.x)
}) %>% invisible
-- group_G_mm18B_vs_eGFP --
out of 23695 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 112, 0.47%
LFC < 0 (down) : 220, 0.93%
outliers [1] : 6, 0.025%
low counts [2] : 11485, 48%
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
-- group_D72_mm18B_vs_eGFP --
out of 23695 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1036, 4.4%
LFC < 0 (down) : 1022, 4.3%
outliers [1] : 6, 0.025%
low counts [2] : 11485, 48%
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
-- group_eGFP_D72_vs_G --
out of 23695 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 2197, 9.3%
LFC < 0 (down) : 2054, 8.7%
outliers [1] : 6, 0.025%
low counts [2] : 11025, 47%
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
-- group_mm18B_D72_vs_G --
out of 23695 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1807, 7.6%
LFC < 0 (down) : 1447, 6.1%
outliers [1] : 6, 0.025%
low counts [2] : 10566, 45%
(mean count < 1)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
if(exists("fc")) readr::write_csv(fc,"./2gun/Iwasaki_0386re_C2C12_H3mm18_noumi_l2fc__final200915.csv")
if(exists("re")) readr::write_csv(re,"./2gun/Iwasaki_0386re_C2C12_H3mm18_noumi_results__final200915.csv")
if(exists("re_all")) readr::write_csv(re_all,"./2gun/Iwasaki_0386re_C2C12_H3mm18_noumi_resultsall__final200915.csv")
DESeqのlog2FCの計算について (coef_ddsから計算する方法) (20200915検証)
# log2FC
re %>% filter(aspect=="group_G_mm18B_vs_eGFP") %>% filter(ens_gene %in% c("ENSMUSG00000088252","ENSMUSG00000024754","ENSMUSG00000024646","ENSMUSG00000032741","ENSMUSG00000031885"))
re %>% filter(aspect=="group_D72_mm18B_vs_eGFP") %>% filter(ens_gene %in% c("ENSMUSG00000088252","ENSMUSG00000024754","ENSMUSG00000024646","ENSMUSG00000032741","ENSMUSG00000031885"))
# coefから再現 (引き算をするだけで良い)
dddddd <- coef_dds %>% filter(ens_gene %in% c("ENSMUSG00000088252","ENSMUSG00000024754","ENSMUSG00000024646","ENSMUSG00000032741","ENSMUSG00000031885"))
dddddd %>% mutate(-group_eGFP_G_vs_mm18B_G, group_mm18B_D72_vs_mm18B_G-group_eGFP_D72_vs_mm18B_G)
NA
## Growth ##
#maplot <- DESeq2::plotMA(res$group_G_mm18B_vs_eGFP, ylim=c(-6,6))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_G_mm18B_vs_eGFP, ylim=c(-4,4),main="G mm18B vs eGFP")
print(maplot)
NULL
maplot <- DESeq2::plotMA(res$group_G_mm18B_vs_eGFP, ylim=c(-5,5),main="G mm18B vs eGFP")
print(maplot)
NULL
#maplot <- DESeq2::plotMA(res$group_G_mm18B_vs_eGFP, ylim=c(-2,2))
#print(maplot)
## D72 ##
#maplot <- DESeq2::plotMA(res$group_D72_mm18B_vs_eGFP, ylim=c(-6,6))
#print(maplot)
maplot <- DESeq2::plotMA(res$group_D72_mm18B_vs_eGFP, ylim=c(-4,4),main="D72 mm18B vs eGFP")
print(maplot)
NULL
maplot <- DESeq2::plotMA(res$group_D72_mm18B_vs_eGFP, ylim=c(-5,5),main="D72 mm18B vs eGFP")
print(maplot)
NULL
#maplot <- DESeq2::plotMA(res$group_D72_mm18B_vs_eGFP, ylim=c(-2,2))
#print(maplot)
こちらで横軸を計算してプロット 20200914add (20200915ver)
Set_cutoff <- 10.0
## 各時刻の平均を計算し、normalized count > 10 を超えるものを抽出する。
#----- 使用するデータのみ取り出す ---# 20200914
norm_plotlist_all <- normalizedcount %>% gather("sample", "normalized",-(ens_gene)) %>% inner_join(def, by = "sample") %>% mutate(time=factor(time, c("G","D72")))%>% mutate(type=factor(type,c("mm18B","eGFP"))) %>% mutate(group=factor(group,c("eGFP_G","mm18B_G","eGFP_D72", "mm18B_D72")))
#norm_plotlist_sel <- norm_plotlist_all %>% filter(sample %in% def_select$sample) %>% mutate(time=factor(time, c("G","D72")))%>% mutate(type=factor(type,c("eGFP","mm18B"))) %>% mutate(group=factor(group,c("eGFP_G","mm18B_G","eGFP_D72", "mm18B_D72")))
#notm_plotlist_cutoff <- norm_plotlist_all %>% annotate() %>% group_by(ens_gene, ext_gene, Day, intact_CTX) %>% summarise(groupMean=mean(normalized)) %>% ungroup() %>% dplyr::select(ens_gene, ext_gene) %>% unique()
## "eGFP","mm18B"関係なく平均を求める
notm_plotlist_beforecutoff <- norm_plotlist_all %>% annotate() %>% group_by(ens_gene, ext_gene, time) %>% summarise(groupMean=mean(normalized))
`summarise()` regrouping output by 'ens_gene', 'ext_gene' (override with `.groups` argument)
#notm_plotlist_beforecutoff <- norm_plotlist_sel %>% annotate() %>% group_by(ens_gene, ext_gene, time) %>% summarise(groupMean=mean(normalized))
notm_plotlist_cutoff <- notm_plotlist_beforecutoff %>% filter(groupMean > Set_cutoff) %>% ungroup() %>% dplyr::select(ens_gene, ext_gene) %>% unique()
nrow(notm_plotlist_beforecutoff %>% ungroup() %>% dplyr::select(ens_gene, ext_gene) %>% unique()) #この値をMAplotのx軸に使用
[1] 23695
nrow(notm_plotlist_cutoff) #解析対象を絞る (後の全体のクラスタリングに使用)
[1] 8641
norm_plotlist_all %>% readr::write_csv("Norm_deftable_all_final200915.csv")
#norm_plotlist_sel %>% readr::write_csv("Norm_deftable_select_final200915.csv") #これをMAplotに使用する
notm_plotlist_beforecutoff %>% readr::write_csv("Norm_groupMean_select_final200915.csv")
notm_plotlist_cutoff %>% readr::write_csv("Norm_groupMean_select_cutoff10_final200915.csv")
nrow(norm_plotlist_all)
[1] 473900
#nrow(norm_plotlist_sel)
nrow(notm_plotlist_beforecutoff)
[1] 47390
nrow(notm_plotlist_cutoff)
[1] 8641
re_select_plot <- re_all %>% filter(aspect %in% c("group_G_mm18B_vs_eGFP","group_D72_mm18B_vs_eGFP")) %>% mutate(time=case_when(aspect=="group_G_mm18B_vs_eGFP"~"G",aspect=="group_D72_mm18B_vs_eGFP"~"D72",TRUE ~ "FALSE")) %>% left_join(notm_plotlist_beforecutoff) %>% mutate(time=factor(time, c("G","D72")))
Joining, by = c("ens_gene", "ext_gene", "time")
re_select_plot %>% readr::write_csv("./MAplot/C2C12_mm18B_vs_eGFP_DEG_time_MAplotdata.csv")
####
Daymean <- re_select_plot %>% group_by(time) %>% summarise(DayMean=mean(groupMean))
`summarise()` ungrouping output (override with `.groups` argument)
Mean_color <- "#B8860B"
Daymean
Allgene_num <- re_select_plot %>% dplyr::select(ens_gene) %>% unique() %>% nrow()
f_DEG_in <- function(x) x %>% filter(padj<0.1)
f_DEG_out <- function(x) x %>% filter((!(padj<0.1))|is.na(padj))
f_inFC_degin <- function(x) x %>% f_DEG_in %>% filter(!(abs(log2FoldChange) > 5.0))
f_inFC_degout <- function(x) x %>% f_DEG_out %>% filter(!(abs(log2FoldChange) > 5.0))
f_overFC_up_degin <- function(x) x %>% f_DEG_in %>% filter(log2FoldChange > 5.0) %>% mutate(log2FoldChange=5.0)
f_overFC_down_degin <- function(x) x %>% f_DEG_in %>% filter(log2FoldChange < -5.0) %>% mutate(log2FoldChange=-5.0)
f_overFC_up_degout <- function(x) x %>% f_DEG_out %>% filter(log2FoldChange > 5.0) %>% mutate(log2FoldChange=5.0)
f_overFC_down_degout <- function(x) x %>% f_DEG_out %>% filter(log2FoldChange < -5.0) %>% mutate(log2FoldChange=-5.0)
### 全て
re_select_plot %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
### DEG
re_select_plot %>% f_DEG_in %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_inFC_degin %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_overFC_up_degin %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_overFC_down_degin %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
### DEG 以外
re_select_plot %>% f_DEG_out %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_inFC_degout %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_overFC_up_degout %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
re_select_plot %>% f_overFC_down_degout %>% group_by(aspect) %>% summarise(n())
`summarise()` ungrouping output (override with `.groups` argument)
ddddddddd <- re_select_plot %>% f_DEG_in %>% mutate(FC_updown = case_when(log2FoldChange>0~"Up", log2FoldChange<0~"Down")) %>% mutate(FC_updown=factor(FC_updown,c("Up","Down"))) %>% arrange(time,FC_updown)
eeeeeeeee <- ddddddddd %>% group_by(aspect,time,FC_updown) %>% summarise(count=n())
`summarise()` regrouping output by 'aspect', 'time' (override with `.groups` argument)
gggglabel <- paste("C2C12 mm18B_vs_eGFP:", Allgene_num, "genes,",
"G:",eeeeeeeee$FC_updown[1],eeeeeeeee$count[1],",",eeeeeeeee$FC_updown[2],eeeeeeeee$count[2],
",","D72:",eeeeeeeee$FC_updown[3],eeeeeeeee$count[3],",",eeeeeeeee$FC_updown[4],eeeeeeeee$count[4],sep=" ")
print(gggglabel)
[1] "C2C12 mm18B_vs_eGFP: 23695 genes, G: Up 112 , Down 220 , D72: Up 1036 , Down 1022"
######
ggmaplot <- re_select_plot %>% ggplot(aes(groupMean,log2FoldChange))+geom_point(size=0.1, alpha = 0.5,data=f_DEG_out,color="#bdbdbd") + geom_abline(intercept=0,slope=0,colour="black",size=0.2) +geom_point(aes(groupMean,log2FoldChange),size=0.1,color="#ff0000", data=f_DEG_in) + scale_x_log10() + theme_bw() + theme(legend.position = "top") + ggtitle(gggglabel) + ylim(-5.0, 5.0) + facet_wrap(~time,ncol=1) + theme(axis.title = element_text(size=15),axis.text = element_text(size=10),axis.text.x = element_text(hjust = 0.5,vjust=1.0), legend.position = "right", strip.text=element_text(size=15),strip.background = element_blank(),title = element_text(size=5))
#+ scale_color_manual(values = c("#ff0000","#ff0000","#000000"))
ggsave(file="./MAplot/C2C12_mm18B_vs_eGFP_DEG_time_MAplot.pdf", plot = ggmaplot, width = 6, height = 8, dpi = 120)
plot(ggmaplot)
ggmaplot <- re_select_plot %>% ggplot(aes(groupMean,log2FoldChange))+geom_point(size=0.1, alpha = 0.5,data=f_DEG_out,color="#bdbdbd") + geom_abline(intercept=0,slope=0,colour="black",size=0.2) + geom_vline(data = Daymean, aes(xintercept=DayMean),colour=Mean_color,size=0.2,linetype="dashed") +geom_point(aes(groupMean,log2FoldChange),size=0.1,color="#ff0000", data=f_DEG_in) + scale_x_log10() + theme_bw() + theme(legend.position = "top") + ggtitle(gggglabel) + ylim(-5.0, 5.0) + facet_wrap(~time,ncol=1) + theme(axis.title = element_text(size=15),axis.text = element_text(size=10),axis.text.x = element_text(hjust = 0.5,vjust=1.0), legend.position = "right", strip.text=element_text(size=15),strip.background = element_blank(),title = element_text(size=5))
#+ scale_color_manual(values = c("#ff0000","#ff0000","#000000"))
ggsave(file="./MAplot/C2C12_mm18B_vs_eGFP_DEG_time_MAplot_Mean.pdf", plot = ggmaplot, width = 6, height = 8, dpi = 120)
plot(ggmaplot)
######
## FC over5も出力
ggmaplot <- re_select_plot %>% ggplot(aes(groupMean,log2FoldChange))+geom_point(size=0.1, alpha = 0.5,data=f_inFC_degout,color="#bdbdbd") +geom_point(size=0.2, alpha = 0.5,shape=2,data=f_overFC_up_degout,color="#bdbdbd")+geom_point(size=0.2, alpha = 0.5,shape=6,data=f_overFC_down_degout,color="#bdbdbd") + geom_abline(intercept=0,slope=0,colour="black",size=0.2) +geom_point(aes(groupMean,log2FoldChange),size=0.1,color="#ff0000", data=f_inFC_degin) +geom_point(size=0.3, shape=2,data=f_overFC_up_degin,color="#ff0000") +geom_point(size=0.3, shape=6,data=f_overFC_down_degin,color="#ff0000") + scale_x_log10() + theme_bw() + theme(legend.position = "top") + ggtitle(gggglabel) + ylim(-5.0, 5.0) + facet_wrap(~time,ncol=1) + theme(axis.title = element_text(size=15),axis.text = element_text(size=10),axis.text.x = element_text(hjust = 0.5,vjust=1.0), legend.position = "right", strip.text=element_text(size=15),strip.background = element_blank(),title = element_text(size=5))
ggsave(file="./MAplot/C2C12_mm18B_vs_eGFP_DEG_time_MAplot_over5.pdf", plot = ggmaplot, width = 6, height = 8, dpi = 120)
plot(ggmaplot)
ggmaplot <- re_select_plot %>% ggplot(aes(groupMean,log2FoldChange))+geom_point(size=0.1, alpha = 0.5,data=f_inFC_degout,color="#bdbdbd") +geom_point(size=0.2, alpha = 0.5,shape=2,data=f_overFC_up_degout,color="#bdbdbd")+geom_point(size=0.2, alpha = 0.5,shape=6,data=f_overFC_down_degout,color="#bdbdbd") + geom_abline(intercept=0,slope=0,colour="black",size=0.2) + geom_vline(data = Daymean, aes(xintercept=DayMean),colour=Mean_color,size=0.2,linetype="dashed") +geom_point(aes(groupMean,log2FoldChange),size=0.1,color="#ff0000", data=f_inFC_degin) +geom_point(size=0.3, shape=2,data=f_overFC_up_degin,color="#ff0000") +geom_point(size=0.3, shape=6,data=f_overFC_down_degin,color="#ff0000") + scale_x_log10() + theme_bw() + theme(legend.position = "top") + ggtitle(gggglabel) + ylim(-5.0, 5.0) + facet_wrap(~time,ncol=1) + theme(axis.title = element_text(size=15),axis.text = element_text(size=10),axis.text.x = element_text(hjust = 0.5,vjust=1.0), legend.position = "right", strip.text=element_text(size=15),strip.background = element_blank(),title = element_text(size=5))
ggsave(file="./MAplot/C2C12_mm18B_vs_eGFP_DEG_time_MAplot_Mean_over5.pdf", plot = ggmaplot, width = 6, height = 8, dpi = 120)
plot(ggmaplot)
20191204追加 mouse, CTX, 2群間 (Up, Down) の結果GOを参考に。 (191120ver)
table_degcluster <- re %>% filter(aspect=="group_D72_mm18B_vs_eGFP") %>% arrange(ens_gene) %>% dplyr::select(ens_gene,ext_gene,log2FoldChange)
#file_degcluster <- "/home/guestA/o70578a/akuwakado/kuwakado/scCELSeq2/Iwasaki_0386_C2C12_H3mm18/Final_Last_Rserver_200523/Iwasaki_0386re_C2C12_H3mm18_noumi_results__final200523.csv"
#table_degcluster <- readr::read_csv(file_degcluster) %>% filter(aspect=="group_D72_mm18B_vs_eGFP") %>% arrange(ens_gene) %>% dplyr::select(ens_gene,ext_gene,log2FoldChange)
# plus
table_degcluster %>% filter(log2FoldChange>0) %>% summarise(plus = dplyr::n())
# minus
table_degcluster %>% filter(log2FoldChange<0) %>% summarise(minus = dplyr::n())
##### FDR setting ######
gofdr <- 0.1
cluster_num <- 2
20191106修正を参考に Up Down用
(20200915, defaltのGOのplotの調子が悪い)
library(clusterProfiler)
library(org.Mm.eg.db)
#-------------#
filename_list <- "./clusterProfile/Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_"
filename_csv <- "./clusterProfile/Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_UpDown"
#-------------#
cluster_list <- as.list(NA) #初期化
for (i in 1:2) {
pre_list <- as.list(NA) #初期化
if (i == 1) {
pre_list <- table_degcluster %>% filter(log2FoldChange>0) %>% dplyr::select(ens_gene) %>% as.list()
#pre_list <- table_degcluster %>% filter(log2FoldChange=="Up") %>% dplyr::select(ens_gene) %>% as.list()
names(pre_list) <- paste("ENSEMBL",as.character("up"),sep="_")
}
else {
pre_list <- table_degcluster %>% filter(log2FoldChange<0) %>% dplyr::select(ens_gene) %>% as.list()
names(pre_list) <- paste("ENSEMBL",as.character("down"),sep="_")
}
if (i == 1) {
cluster_list <- pre_list
}
else cluster_list <- c(cluster_list, pre_list)
}
for (i in 1:2) {
print(paste(i, cluster_list[[i]] %>% tibble::enframe(name = NULL) %>% nrow(), sep=", "))
#print(paste(i, cluster_list[[i]] %>% as_tibble() %>% nrow(), sep=", ")) #Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
pre_ego_BP <- enrichGO(gene = cluster_list[[i]],
OrgDb = "org.Mm.eg.db",
keyType = 'ENSEMBL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = gofdr, qvalueCutoff = 1.0)
## pvalue < qvalue < p.adjust ##
# qvalueCutoff = 0.3 qvalueCutoff = 0.2 , qvalueCutoff = 1.0
if (i == 1) {
table_ego_BP <- data.frame(pre_ego_BP) %>% mutate(cluster="Up") # リスト型からデータフレームへ変換
#---- plot ---#
#BPplot <- dotplot(pre_ego_BP, showCategory=30, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
#print(BPplot)
#ggsave(BPplot,file=paste(filename_list,"Up",".png",sep=""), width = 8, height = 12, dpi = 120)
}
else {
table_ego_BP <- table_ego_BP %>% bind_rows(data.frame(pre_ego_BP) %>% mutate(cluster="Down"))
#---- plot ---#
#BPplot <- dotplot(pre_ego_BP, showCategory=30, orderBy = "Count") #clusterProfile の機能で図を描く(191106修正) wrong orderBy parameter; set to default `orderBy = "x"`
#print(BPplot)
#ggsave(BPplot,file=paste(filename_list,"Down",".png",sep=""), width = 8, height = 12, dpi = 120)
}
}
[1] "1, 1036"
[1] "2, 1022"
#------#
# データはtable_ego_BPに。
print(table_ego_BP %>% group_by(cluster) %>% summarise())
`summarise()` ungrouping output (override with `.groups` argument)
#------------------------------------------------------#
# テーブルを保存
table_ego_BP1 <- table_ego_BP %>% mutate(cluster=factor(cluster,c("Up","Down"))) %>% arrange(cluster,desc(Count)) #191106
readr::write_csv(table_ego_BP1,paste(filename_csv,".csv",sep=""))
# 先のテーブルのgeneIDをgene nameに置換する。(20191025)
tablego <- table_ego_BP1 %>% mutate(gene_name=geneID) %>% dplyr::select(-(qvalue))
for (i in 1:nrow(table_degcluster)) {
tablego <- tablego %>% mutate(gene_name=gsub(gene_name, pattern=table_degcluster$ens_gene[i], replacement=table_degcluster$ext_gene[i], ignore.case = TRUE))
}
print(tablego)
readr::write_csv(tablego,paste(filename_csv,"_genename.csv",sep=""))
#------------------------------------------------------#
#GOのtermの数
print(tablego %>% group_by(cluster) %>% summarise(count_GO=n()))
`summarise()` ungrouping output (override with `.groups` argument)
## 変更 ##
table_ego_BP_2gunfdr0p1_cluster <- tablego # 自由に変更する
#--- メモ ----#
#tableggg <- table_ego_clustercluster
#colm <- tableggg$geneID
#for (i in 1:88) {
# colm <- sub(rrres_cluster3$ens_gene[i], rrres_cluster3$ext_gene[i], colm)
#}
#print(colm)
# Benjamini correction を p-adjust として使用する
# figのタイトルを修正 (20191213)
file_BP_plot <- "./clusterProfile/BPfdr0p1__Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_fin200915.pdf"
file_BP_plot_muscle <- "./clusterProfile/BPfdr0p1__Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_2gunfdr0p1_log2FoldChange_muscleonly_fin200915.pdf"
#--------------------#
BP_matome <- tablego
rowlength <- BP_matome %>% group_by(Description) %>% summarise() %>% nrow()
`summarise()` ungrouping output (override with `.groups` argument)
BP_plot <- BP_matome %>% filter(p.adjust<fdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue")+ xlab("C2C12 (fdr 0.1)") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(fdr),sep=""))
print(BP_plot)
ggsave(plot=BP_plot,file=file_BP_plot, width = 10, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)
#---muscle関連のみ
BP_matome_muscle <- BP_matome %>% filter(grepl("muscle", Description)|grepl("myo", Description))
rowlength <- BP_matome_muscle %>% group_by(Description) %>% summarise() %>% nrow()
`summarise()` ungrouping output (override with `.groups` argument)
BP_plot_muscle <- BP_matome_muscle %>% filter(p.adjust<fdr) %>% ggplot(aes(x=cluster, y=reorder(Description,Count), size=Count, fill=p.adjust)) + geom_point(shape = 21) + theme(strip.text = element_text(size = 10)) + theme_minimal() + theme(strip.text = element_text(size = 12),axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_gradient(low = "red" , high = "blue") + xlab("C2C12 (fdr 0.1) (muscle)") + ylab("GO Description (BP)") + labs(fill=paste("p.adjust (BH) < ",as.character(fdr),sep=""))
print(BP_plot_muscle)
ggsave(BP_plot_muscle,file=file_BP_plot_muscle, width = 8, height = (5+rowlength/6), dpi = 120,limitsize = FALSE)
NA
NA
# x=pvalue, y=p.adjust
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=p.adjust, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(fdr),sep=""))
print(plottt)
# x=pvalue, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=pvalue, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(fdr),sep=""))
print(plottt)
# x=p.adjust, y=qvalue
plottt <- table_ego_BP %>% ggplot(aes(x=p.adjust, y=qvalue, size=Count)) + geom_point()+geom_abline(intercept=0,slope=1.0,linetype="dashed",colour="blue") + xlim(0,NA) + ylim(0,NA) + ggtitle(label=paste("p.adjust (BH) < ",as.character(fdr),sep=""))
print(plottt)
## pvalue < qvalue < p.adjust ##
# pvalue < p.adjust
# pvalue < qvalue
# qvalue < p.adjust
#---------------------#
#[BBRB-seq_0438_QC_tmpl_v6_noumi_190515-H3mm18KO_CTX_S2-Day0_S3_fdr0p2ver_and_LRT_191024 (umi補正なし,fdr0.2) (TPM 190722ver) (190924を元に) (190627-1024)]を参考にした。
#pvalueCutoff
#pvalue cutoff on enrichment tests to report
#pAdjustMethod
#one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"
#qvalueCutoff
#qvalue cutoff on enrichment tests to report as significant. Tests must pass i) pvalueCutoff on unadjusted pvalues, ii) pvalueCutoff on adjusted pvalues and iii) qvalueCutoff on qvalues to be reported.
# 設定(pvalueCutoff = 0.1, qvalueCutoff = 0.2)だと、p値<0.1, p.adjust値<0.1, q値<0.2 になっている。
plot Top GO (20200915 add)
tablego_top <- tablego %>% group_by(cluster) %>% arrange(desc(Count), p.adjust, pvalue) %>% mutate(rank=row_number()) %>% filter(rank<=10)
print(tablego_top)
filename <- paste(dirname(filename_csv),"/Top10__",basename(filename_csv),"_genename.csv",sep="")
print(filename)
[1] "./clusterProfile/Top10__Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_UpDown_genename.csv"
readr::write_csv(tablego_top,filename)
# C2 Top10
#file_c2 <- "/home/akuwakado/makeplot_18project/Inputfile/Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_UpDown_genename.csv"
#datac2 <- readr::read_csv(file_c2) %>% group_by(cluster) %>% arrange(desc(Count), p.adjust, pvalue) %>% mutate(rank=row_number()) %>% filter(rank<=10)
#print(datac2)
#filename <- paste("./Outputfile/","Top10__",basename(file_c2),sep="")
#print(filename)
#datac2 %>% readr::write_csv(filename)
一度に描画
plot_tablego_top <- tablego_top %>% ungroup() %>% mutate(cluster=factor(cluster,c("Up","Down"))) %>% group_by(cluster) %>% dplyr::mutate(GeneRatio1=GeneRatio) %>% tidyr::separate(col=GeneRatio1,sep="/",into=c("count","BP_genesize")) %>% mutate(BP_genesize=as.integer(BP_genesize),Gene_ratio=Count/BP_genesize) %>% dplyr::select(-count)
plot_tablego_top %>% summarise(max=max(Gene_ratio),min=min(Gene_ratio))
`summarise()` ungrouping output (override with `.groups` argument)
plot_tablego_top %>% summarise(max=max(Count),min=min(Count))
`summarise()` ungrouping output (override with `.groups` argument)
plot_tablego_top_2 <- plot_tablego_top %>% mutate(new_Description = paste("(",cluster,") ",Description,sep=""))
#xmax=0.175
#xmin=0.085
#all_break <- c(3,6,9,12,15)
xmax=0.085
xmin=0.050
#all_break <- c(55,60,65,70,75,80,85)
all_break <- c(50,55,60,65,70,75) #+ scale_size_area(breaks=all_break)
sort_plot_tablego_top_2 <- plot_tablego_top_2 %>% arrange(desc(rank))
gggU <- plot_tablego_top_2 %>% arrange(desc(rank)) %>% mutate(new_Description =factor(new_Description,sort_plot_tablego_top_2$new_Description)) %>% ggplot(aes(x=Gene_ratio, y=new_Description, fill=p.adjust)) + geom_point(aes(size=Count),shape = 21) + theme_bw() + theme(legend.position = "right",legend.box="horizontal",strip.text = element_text(size = 12),legend.title = element_text(size = 14),axis.title = element_text(size = 14),legend.text = element_text(size = 12),axis.text = element_text(size = 12),axis.text.x = element_text(vjust = 1),strip.background = element_blank()) + scale_fill_gradient(low = "red" , high = "blue",limits = c(0, 0.1)) + xlim(xmin,xmax) + facet_wrap(~cluster,scales = "free_y",ncol=1)
gggU0 <- plot_tablego_top_2 %>% arrange(desc(rank)) %>% mutate(new_Description =factor(new_Description,sort_plot_tablego_top_2$new_Description)) %>% ggplot(aes(x=Gene_ratio, y=new_Description, fill=p.adjust)) + geom_point(aes(size=Count),shape = 21) + theme_bw() + theme(legend.position = "right",legend.box="horizontal",strip.text = element_blank(),legend.title = element_blank(),axis.title = element_blank(),legend.text = element_blank(),axis.text = element_blank(),axis.text.x = element_blank(),strip.background = element_blank()) + scale_fill_gradient(low = "red" , high = "blue",limits = c(0, 0.1)) + xlim(xmin,xmax) + facet_wrap(~cluster,scales = "free_y",ncol=1)
print(gggU)
print(gggU0)
filename <- paste(dirname(filename_csv),"/Top10__",basename(filename_csv),"_plot.pdf",sep="")
print(filename)
[1] "./clusterProfile/Top10__Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_UpDown_plot.pdf"
ggsave(gggU,file=filename, width = 8, height = 7, dpi = 120,limitsize = FALSE)
filename <- paste(dirname(filename_csv),"/Top10__",basename(filename_csv),"_plot_none.pdf",sep="")
print(filename)
[1] "./clusterProfile/Top10__Iwasaki_0386re_C2C12_H3mm18_2gunfdr0p1_log2FoldChange_BPfdr0p1_generatio_UpDown_plot_none.pdf"
ggsave(gggU0,file=filename, width = 4, height = 7, dpi = 120,limitsize = FALSE)
#ggsave(gggU,file=paste("./Outputfile/","C2C12_H3mm18_CEL0386re__clusterAll_Top10_BPfdr0p1_plot.pdf",sep=""), width = 8, height = 7, dpi = 120,limitsize = FALSE)
#ggsave(gggU0,file=paste("./Outputfile/","C2C12_H3mm18_CEL0386re__clusterAll_Top10_BPfdr0p1_plot_none.pdf",sep=""), width = 4, height = 7, dpi = 120,limitsize = FALSE)
Top GO term heatmap
(GO 20200915ver)
GOdata_top10 <- tablego_top
raa <- strsplit(GOdata_top10$geneID , "/")
for (i in 1:nrow(GOdata_top10)) {
rbbb <- data.frame(Description=rep(GOdata_top10$Description[i],length(raa[[i]])), cluster=rep(GOdata_top10$cluster[i],length(raa[[i]])), ens_gene=raa[[i]])
if (i == 1) {
Top10GO <- rbbb
}
else {
Top10GO <- Top10GO %>% bind_rows(rbbb)
}
}
Top10GO_t <- Top10GO %>% as_tibble() %>% mutate(Description=as.character(Description),ens_gene=as.character(ens_gene),cluster=as.character(cluster))
Top10GO_s <- Top10GO_t %>% group_by(ens_gene,cluster) %>% summarise(Des=paste(Description, collapse = ", ")) %>% ungroup()
`summarise()` regrouping output by 'ens_gene' (override with `.groups` argument)
readr::write_csv(Top10GO_s, "./clusterProfile/heatmap/CEL0386noumi_C2C12_H3mm18__DEG_GOtop10_genelist__20200915.csv")
Top10GO_s_down <- Top10GO_s %>% filter(cluster=="Down")
Top10GO_s_up <- Top10GO_s %>% filter(cluster=="Up")
zscore_type_Top10GO_deg <- zscore_type %>% filter(ens_gene %in% Top10GO_s$ens_gene)
zscore_type_Top10GO_up <- zscore_type %>% filter(ens_gene %in% Top10GO_s_up$ens_gene)
zscore_type_Top10GO_down <- zscore_type %>% filter(ens_gene %in% Top10GO_s_down$ens_gene)
zscore_type_housekeep <- zscore_type %>% filter(ext_gene %in% c("Rpl27","Actb","Gapdh"))
re %>% filter(ext_gene %in% c("Rpl27","Actb","Gapdh"))
NA
breaksList = seq(-3.0, 3.0, 0.1)
heatmapcols <- colorRampPalette(rev(brewer.pal(n=7,name="RdYlBu")))(length(breaksList))
def_select <- def %>% dplyr::filter(group %in% c("mm18B_G", "eGFP_G","mm18B_D72", "eGFP_D72")) %>% mutate(group=factor(group,c("eGFP_G","mm18B_G","eGFP_D72", "mm18B_D72"))) %>% arrange(group,sample)
#================================================#
#anno_row <- data.frame(Chr = zscore_BRBDEG$chr, cluster = zscore_BRBDEG$cluster)
#rownames(anno_row) <- zscore_BRBDEG$ext_gene
#anno_col <- data.frame(Seq = def_list_select$seq, Dox = def_list_select$type)
#rownames(anno_col) <- def_list_select$sample
#================================================#
zscore_mat_down <- zscore_type_Top10GO_down %>% dplyr::select(all_of(def_select$sample)) %>% as.matrix()
rownames(zscore_mat_down) <- zscore_type_Top10GO_down$ext_gene
heat_title_down <- paste("Top10GO down: ",nrow(zscore_type_Top10GO_down)," Total: ",nrow(zscore_type),sep="")
zscore_mat_up <- zscore_type_Top10GO_up %>% dplyr::select(all_of(def_select$sample)) %>% as.matrix()
rownames(zscore_mat_up) <- zscore_type_Top10GO_up$ext_gene
heat_title_up <- paste("Top10GO up: ",nrow(zscore_type_Top10GO_up)," Total: ",nrow(zscore_type),sep="")
zscore_mat_keep <- zscore_type_housekeep %>% dplyr::select(all_of(def_select$sample)) %>% as.matrix()
rownames(zscore_mat_keep) <- zscore_type_housekeep$ext_gene
heat_title_keep <- paste("Housekeeping: ",nrow(zscore_type_housekeep)," Total: ",nrow(zscore_type),sep="")
#================================================#
z_heat1 <- pheatmap::pheatmap(zscore_mat_down,
main = heat_title_down,
scale = "none",
#cluster_rows = FALSE, #peak (clusterで) (領域名)
#cluster_cols = FALSE, #samplrf
#cluster_rows = FALSE, #peak (clusterで) (領域名)
cluster_rows = TRUE, #peak (clusterで) (領域名)
cluster_cols = FALSE, #samplrf
#show_rownames = TRUE, #peak名 (領域名)
show_rownames = TRUE, #peak名 (領域名)
show_colnames = TRUE, #sample_position名 (モチーフ名)
#annotation_names_col = TRUE,
#show_rownames = TRUE, #peak名
#show_colnames = TRUE, #sample_position名
#annotation_names_col =TRUE,
#gaps_row=table_gap$nonoo,
#gaps_col=c(3,6,8,11),
#na_col ="white",
gaps_row=c(80),
gaps_col= seq(5, 20, 5),
#gaps_col= seq(2, 8*2, 2),
#gaps_col= c(2,4,6,8,10,12,14), # c(3, 6, 9, 12), #gaps_col= seq(2, 6*2, 2),
#gaps_col=c(200,400,600,800,1000,1200,1400,1600,1800,2000,2200,2400), gaps_col= seq(200, 12*200, 200),
#cellheight = nrow(mymatrix2_1)*(0.25),
#cellwidth = 20,
#cellheight = 400/nrow(mymatrix1),
#cellheight = 0.005, #cellheight = 1.5,
#cellheight = 0.010, #cellheight = 1.5,
cellheight = 4.0, #cellheight = 1.5,
cellwidth = 40, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 8, #cellwidth = 3,
#cellheight = 6, #cellheight = 1.5,
#cellheight = 6, #cellheight = 1.5,
#cellwidth = 7, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 15, #cellwidth = 3,
#border_color="gray", #border_color="#000000",
border_color=NA,
#fontsize_row = 6,
#annotation_row = mixed_name,
#annotation_row = anno_row,
#annotation_col = anno_col,
# cutree_rows = 6,
#cutree_rows = 10,
#display_numbers = TRUE,
#number_format = "%1.2e",
#number_color = "black",
#annotation_col = annotdf_sample
fontsize_col = 5,
fontsize_row = 3.7,
#fontsize_row = 0.6,
legend=TRUE,
#legend_breaks = leg_break,
#legend_labels = as.character(leg_break),
breaks = breaksList,
color = heatmapcols
#annotation_colors = mycolors
#treeheight_row = 10
)
ggsave(plot=z_heat1,file="./clusterProfile/heatmap/heatmap_down.pdf", width = 15, height = 15, dpi = 360, limitsize = FALSE)
#================================================#
z_heat1 <- pheatmap::pheatmap(zscore_mat_up,
main = heat_title_up,
scale = "none",
#cluster_rows = FALSE, #peak (clusterで) (領域名)
#cluster_cols = FALSE, #samplrf
#cluster_rows = FALSE, #peak (clusterで) (領域名)
cluster_rows = TRUE, #peak (clusterで) (領域名)
cluster_cols = FALSE, #samplrf
#show_rownames = TRUE, #peak名 (領域名)
show_rownames = TRUE, #peak名 (領域名)
show_colnames = TRUE, #sample_position名 (モチーフ名)
#annotation_names_col = TRUE,
#show_rownames = TRUE, #peak名
#show_colnames = TRUE, #sample_position名
#annotation_names_col =TRUE,
#gaps_row=table_gap$nonoo,
#gaps_col=c(3,6,8,11),
#na_col ="white",
gaps_row=c(80),
gaps_col= seq(5, 20, 5),
#gaps_col= seq(2, 8*2, 2),
#gaps_col= c(2,4,6,8,10,12,14), # c(3, 6, 9, 12), #gaps_col= seq(2, 6*2, 2),
#gaps_col=c(200,400,600,800,1000,1200,1400,1600,1800,2000,2200,2400), gaps_col= seq(200, 12*200, 200),
#cellheight = nrow(mymatrix2_1)*(0.25),
#cellwidth = 20,
#cellheight = 400/nrow(mymatrix1),
#cellheight = 0.005, #cellheight = 1.5,
#cellheight = 0.010, #cellheight = 1.5,
cellheight = 4.0, #cellheight = 1.5,
cellwidth = 40, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 8, #cellwidth = 3,
#cellheight = 6, #cellheight = 1.5,
#cellheight = 6, #cellheight = 1.5,
#cellwidth = 7, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 15, #cellwidth = 3,
#border_color="gray", #border_color="#000000",
border_color=NA,
#fontsize_row = 6,
#annotation_row = mixed_name,
#annotation_row = anno_row,
#annotation_col = anno_col,
# cutree_rows = 6,
#cutree_rows = 10,
#display_numbers = TRUE,
#number_format = "%1.2e",
#number_color = "black",
#annotation_col = annotdf_sample
fontsize_col = 5,
fontsize_row = 3.7,
#fontsize_row = 0.6,
legend=TRUE,
#legend_breaks = leg_break,
#legend_labels = as.character(leg_break),
breaks = breaksList,
color = heatmapcols
#annotation_colors = mycolors
#treeheight_row = 10
)
#ggsave(plot=z_heat1,file="./H3K27me3_ChIL_heatmap.png", width = 8, height = 15, dpi = 360, limitsize = FALSE)
ggsave(plot=z_heat1,file="./clusterProfile/heatmap/heatmap_up.pdf", width = 15, height = 20, dpi = 360, limitsize = FALSE)
#================================================#
z_heat1 <- pheatmap::pheatmap(zscore_mat_keep,
main = heat_title_keep,
scale = "none",
#cluster_rows = FALSE, #peak (clusterで) (領域名)
#cluster_cols = FALSE, #samplrf
#cluster_rows = FALSE, #peak (clusterで) (領域名)
cluster_rows = TRUE, #peak (clusterで) (領域名)
cluster_cols = FALSE, #samplrf
#show_rownames = TRUE, #peak名 (領域名)
show_rownames = TRUE, #peak名 (領域名)
show_colnames = TRUE, #sample_position名 (モチーフ名)
#annotation_names_col = TRUE,
#show_rownames = TRUE, #peak名
#show_colnames = TRUE, #sample_position名
#annotation_names_col =TRUE,
#gaps_row=table_gap$nonoo,
#gaps_col=c(3,6,8,11),
#na_col ="white",
gaps_row=c(80),
gaps_col= seq(5, 20, 5),
#gaps_col= seq(2, 8*2, 2),
#gaps_col= c(2,4,6,8,10,12,14), # c(3, 6, 9, 12), #gaps_col= seq(2, 6*2, 2),
#gaps_col=c(200,400,600,800,1000,1200,1400,1600,1800,2000,2200,2400), gaps_col= seq(200, 12*200, 200),
#cellheight = nrow(mymatrix2_1)*(0.25),
#cellwidth = 20,
#cellheight = 400/nrow(mymatrix1),
#cellheight = 0.005, #cellheight = 1.5,
#cellheight = 0.010, #cellheight = 1.5,
cellheight = 4.0, #cellheight = 1.5,
cellwidth = 40, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 8, #cellwidth = 3,
#cellheight = 6, #cellheight = 1.5,
#cellheight = 6, #cellheight = 1.5,
#cellwidth = 7, #cellwidth = 3,
#cellheight = 0.7, #cellheight = 1.5,
#cellwidth = 15, #cellwidth = 3,
#border_color="gray", #border_color="#000000",
border_color=NA,
#fontsize_row = 6,
#annotation_row = mixed_name,
#annotation_row = anno_row,
#annotation_col = anno_col,
# cutree_rows = 6,
#cutree_rows = 10,
#display_numbers = TRUE,
#number_format = "%1.2e",
#number_color = "black",
#annotation_col = annotdf_sample
fontsize_col = 5,
fontsize_row = 3.7,
#fontsize_row = 0.6,
legend=TRUE,
#legend_breaks = leg_break,
#legend_labels = as.character(leg_break),
breaks = breaksList,
color = heatmapcols
#annotation_colors = mycolors
#treeheight_row = 10
)
#ggsave(plot=z_heat1,file="./H3K27me3_ChIL_heatmap.png", width = 8, height = 15, dpi = 360, limitsize = FALSE)
ggsave(plot=z_heat1,file="./clusterProfile/heatmap/heatmap_housekeeping.pdf", width = 15, height = 15, dpi = 360, limitsize = FALSE)
# 2019 12月作成
norm_plotlist_all_name <- norm_plotlist_all %>% inner_join(e2g, by = "ens_gene")
readr::write_csv(norm_plotlist_all_name,"Norm_deftable_all_final200915__genename.csv") #191206 追加
nrow(norm_plotlist_all)
[1] 473900
nrow(norm_plotlist_all_name)
[1] 473900
norm_plotlist_all “Norm_deftable_all_final200915.csv” を使う
plotgene_list <- c("Col3a1","Acta1","Myog","Myod1","Tnnt1","Tnnt2","Tnnt3","Csrp3","Myh3","Ckm","Rpl27","Actb","Gapdh","Slc38a2","Inhba","Myh9","Rpl13","Nsdhl")
length(plotgene_list)
[1] 18
#"Myh3","Ckm","Acta1","Tnnt2","Actb","Csrp3"
re %>% filter(aspect=="group_D72_mm18B_vs_eGFP") %>% filter(ext_gene %in% plotgene_list) %>% arrange(log2FoldChange)
#======== Change every data ここで順番を変更 ========#
#-------#
nbl <- norm_plotlist_all_name %>% filter(ext_gene %in% plotgene_list)
#====================================================#
f_gene_norm <- function(x) x %>% group_by(group, type, time, ext_gene) %>% summarise(avg=mean(normalized),se=sd(normalized)/sqrt(length(normalized))) %>% ungroup()
#----#
nbl %>% group_by(group, type, time) %>% summarise()
`summarise()` regrouping output by 'group', 'type' (override with `.groups` argument)
#----#
#face="italic"
#, axis.text.x = element_text(angle = 45, hjust = 1) #X軸のラベルを傾ける場合
#, axis.text.x = element_text(hjust = 0.5) #X軸のラベルを水平にする場合
### point ###
gggggpp <- ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",nrow=3)+geom_line(size=1.0, aes(x=time,y=avg,colour=type),data=f_gene_norm)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top", plot.title=element_text(size=16))+ggsci::scale_color_npg() + ylab("normalized count")
file_path <- "./normCount/normCount_plot_final200915_plot1.pdf"
print(file_path)
[1] "./normCount/normCount_plot_final200915_plot1.pdf"
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 10)
print(gggggpp)
gggggpp <- ggplot(nbl,aes(time,normalized,group=type,colour=type))+geom_point(size=2)+facet_wrap(~ext_gene,scale="free_y",nrow=3)+geom_smooth(se=FALSE)+theme_bw() + ylim(0,NA) + theme(axis.text=element_text(hjust = 1, size=16), axis.text.x = element_text(hjust = 0.5), axis.title=element_text(size=16),axis.title.x = element_blank(), legend.title=element_text(size=16), legend.text = element_text(size=16), strip.background = element_blank(), strip.text.x = element_text(size=20,face="italic"), legend.position = "top", plot.title=element_text(size=16))+ggsci::scale_color_npg() + ylab("normalized count")
file_path <- "./normCount/normCount_plot_final200915_plot1_smooth.pdf"
print(file_path)
[1] "./normCount/normCount_plot_final200915_plot1_smooth.pdf"
ggsave(file=file_path, plot = gggggpp, dpi = 100, width = 12, height = 9)
TPMで作図 (191203修正)
# カウント0も表示するように変更 (20190722)
tpm <- umi %>% group_by(sample) %>% mutate(sample_total=sum(count),TPM=count/sum(count)*1e6) %>% ungroup
tpm_zero <- tpm %>% select(sample,ens_gene,TPM) %>% spread(sample,TPM,fill=0) %>% gather(sample, TPM, -ens_gene) #カウント0のサンプルは0を入れる 20190722追加して修正
tpm_def <- def %>% select(-count) %>% dplyr::inner_join(tpm_zero, by = "sample") #tpmをtpm_zeroに修正、20190722修正
matome <- tpm_def %>% inner_join(e2g, by = "ens_gene")
readr::write_csv(matome,"Iwasaki_0386re_C2C12_H3mm18_umi_TPM__final20915.csv") #191203 追加
#-- 確認 --#
umi %>% group_by(sample) %>% summarise(sum(count)) # 確認
`summarise()` ungrouping output (override with `.groups` argument)
tpm_zero %>% group_by(sample) %>% summarise(sum(TPM)) # 確認
`summarise()` ungrouping output (override with `.groups` argument)
head(matome)
#-- 覚書 --#
#tpm %>% filter(ens_gene=="ENSMUSG00000043090") #17個
#tpm_zero %>% filter(ens_gene=="ENSMUSG00000043090") #32個
# -- 旧ver ---
#tpm <- umi %>% group_by(sample) %>% mutate(sample_total=sum(count),TPM=count/sum(count)*1e6) %>% ungroup
#umi %>% group_by(sample) %>% summarise(sum(count))
#tpm %>% group_by(sample) %>% summarise(sum(TPM))
#tpm_def <- def %>% select(-count) %>% dplyr::inner_join(tpm, by = "sample")